# generate a vector of file names
lists <- list.files("./brain-map-pilot")
# read in all the files
for (i in lists) {
assign(i, read.table(paste0("./brain-map-pilot/",i)))
}
# generate a vector of unique brain areas
unq <- unique(substr(lists, 4, nchar(lists)-2))
# For loop to combine datasets in the same brain area
for (i in unq) {
ind <- lists[grepl(i,lists)] # list datasets that include each unqiue brain area
fa <- sum(substr(ind,1,2) == "FA")
pm <- sum(substr(ind,1,2) == "PM")
dat <- do.call(cbind,lapply(ind, get)) # cbind all the datasets
dat.clean <- dat[,colnames(dat)!="V1"] # remove duplicated gene name columns
rownames(dat.clean) <- dat[,1]
colnames(dat.clean) <- c(paste0("FA_",seq(fa)), paste0("PM_",seq(pm))) # column names
# colnames(dat.clean) <- c(rep("FA",fa), rep("PM",pm)) # column names
sam <- data.frame(condition=c(rep("FA",fa), rep("PM",pm))) # generate sample data
rownames(sam) <- colnames(dat.clean)
assign(i, dat.clean) # assign the combined data into a dataset named after the area
assign(paste0(i,"_sam"), sam) # assign sample data to unique name
rm(sam, fa, pm, i,ind, dat, dat.clean) # remove all unnecessary objects
}
rm(lists, list=lists) # remove all unnecessary objects
# save.image(file="Brain6.Rdata")
# load("Brain6.Rdata")
# generate a function to plot the cell coverage information
cellcov <- function(data, name) {
pZero <- mean(data == 0) # overall proportion of zero counts
libsize <- colSums(data) # overall proportion of zero counts
# find cell coverage
coverage <- colMeans(data > 0)
cell_info <- data.frame(names = colnames(data), coverage = coverage)
# plot cell coverage froms matrix cell_info
ggplot(cell_info, aes(x = names, y = coverage)) + geom_point() + xlab("Cell Name") + ylab("Coverage") + theme_bw() + labs(title = paste0("Cell Coverage (", name, ")")) + theme(plot.title = element_text(hjust = 0.5, size = 15, face = "bold"))
}
# apply for each dataset
pls <- lapply(unq, function(i) cellcov(get(i), i))
# Put all plots in one grid
do.call(grid.arrange, c(pls, nrow = 2))
lowfilt <- function(data) {
countdata <- data
# Generate the CPM values and then filter. Note that by converting to CPMs we are normalising for the different sequencing depths for each sample.
myCPM <- cpm(countdata)
# Which values in myCPM are greater than 0.5?
threshold <- 0.5
thresh <- myCPM > threshold
# This produces a logical matrix with TRUEs and FALSEs head(thresh)
# Summary of how many TRUEs there are in each row.
table(rowSums(thresh))
# we would like to keep genes that have at least 2 TRUES in each row of thresh
keep <- rowSums(thresh) >= 1
# Subset the rows of countdata to keep the more highly expressed genes
counts.keep <- countdata[keep, ]
summary(keep)
return(counts.keep)
# dim(counts.keep)
}
for (i in unq) {
assign(i, lowfilt(get(i)))
}
rle.pca <- function(data, name) {
sam <- get(paste0(name, "_sam"))
set <- newSeqExpressionSet(as.matrix(data), phenoData = sam)
colors <- brewer.pal(ncol(data), "Set2")
op <- par(mfrow = c(1, 2))
plotRLE(set, outline = FALSE, ylim = c(-1.5, 1.5), col = colors[sam$condition])
plotPCA(set, col = colors[sam$condition], cex = 0.5)
mtext(name)
}
# apply for each dataset
invisible(lapply(unq, function(x) rle.pca(get(x), x)))
blnorm <- function(data, name, method) {
sam <- get(paste0(name, "_sam"))
set <- newSeqExpressionSet(as.matrix(data), phenoData = sam)
colors <- brewer.pal(ncol(data), "Set2")
set <- betweenLaneNormalization(set, which = method)
if (method == "upper") {
par(mfrow = c(1, 2))
plotRLE(set, outline = FALSE, ylim = c(-1.5, 1.5), col = colors[sam$condition])
plotPCA(set, col = colors[sam$condition], cex = 0.5)
mtext(paste0("UQ - ", name))
} else {
par(mfrow = c(1, 2))
plotRLE(set, outline = FALSE, ylim = c(-1.5, 1.5), col = colors[sam$condition])
plotPCA(set, col = colors[sam$condition], cex = 0.5)
mtext(paste0("Median - ", name))
}
}
# apply for each dataset
invisible(lapply(unq, function(x) blnorm(get(x), x, "upper")))
invisible(lapply(unq, function(x) blnorm(get(x), x, "median")))
RUVg)If no genes are known a priori not to be influenced by the covariates of interest, one can obtain a set of "in-silico empirical" negative controls, e.g., least significantly DE genes based on a first-pass DE analysis performed prior to RUVg normalization.
bln.set <- function(data, name) {
sam <- get(paste0(name, "_sam"))
set <- newSeqExpressionSet(as.matrix(data), phenoData = sam)
set <- betweenLaneNormalization(set, which = "upper")
return(set)
}
for (i in unq) {
assign(paste0(i, "_set"), bln.set(get(i), i))
}
ecg <- function(data, name) {
sam <- get(paste0(name, "_sam"))
set <- get(paste0(name, "_set"))
design <- model.matrix(~sam$condition, data = pData(set))
y <- DGEList(counts = counts(set), group = sam$condition)
y <- calcNormFactors(y, method = "upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
fit <- glmFit(y, design)
lrt <- glmLRT(fit, coef = 2)
top <- topTags(lrt, n = nrow(set))$table
empirical <- rownames(set)[which(!(rownames(set) %in% rownames(top)[1:5000]))]
return(empirical)
}
for (i in unq) {
assign(paste0(i, "_ecg"), ecg(get(i), i))
}
Here, we consider all but the top 5,000 genes as ranked by edgeR p-values.
ruv.g <- function(name) {
set <- get(paste0(name, "_set"))
empirical <- get(paste0(name, "_ecg"))
set2 <- RUVg(set, empirical, k = 1)
# pData(set2)
}
for (i in unq) {
assign(paste0(i, "_set2"), ruv.g(i))
}
for (i in 1:length(unq)) {
set2 <- get(paste0(unq[i], "_set2"))
sam <- get(paste0(unq[i], "_sam"))
colors <- brewer.pal(nrow(sam), "Set2")
par(mfrow = c(1, 2))
plotRLE(set2, outline = FALSE, ylim = c(-1, 1), col = colors[sam$condition])
plotPCA(set2, col = colors[sam$condition], cex = 0.5)
mtext(unq[i])
}
edgeR packageedger.deg <- function(name) {
require(edgeR)
set2 <- get(paste0(name, "_set2"))
sam <- get(paste0(name, "_set2"))
design <- model.matrix(~condition + W_1, data = pData(set2))
y <- DGEList(counts = counts(set2), group = sam$condition)
y <- calcNormFactors(y, method = "upperquartile")
y <- estimateGLMCommonDisp(y, design)
y <- estimateGLMTagwiseDisp(y, design)
logCPM <- cpm(y, log = T)
fit <- glmFit(y, design)
lrt <- glmLRT(fit, coef = 2)
ruv.result <- lrt$table
top <- topTags(lrt, n = nrow(set2))$table
top$Gene <- rownames(top)
top
}
for (i in unq) {
assign(paste0(i, "_edge"), edger.deg(i))
}
DESeq2 packagedeseq.deg <- function(name) {
set2 <- get(paste0(name, "_set2"))
dds <- DESeqDataSetFromMatrix(countData = counts(set2), colData = pData(set2), design = ~condition + W_1)
featureData <- data.frame(gene = rownames(set2))
mcols(dds) <- DataFrame(mcols(dds), featureData)
# keep <- rowSums(counts(dds)) >= 10 dds <- dds[keep,]
dds$condition <- factor(dds$condition, levels = c("FA", "PM"))
dds <- DESeq(dds)
deseq.result <- results(dds, alpha = 0.999999)
deseq.result <- data.frame(deseq.result)
deseq.result$Gene <- rownames(deseq.result)
deseq.result$logCPM <- aveLogCPM(counts(set2))
deseq.result
}
for (i in unq) {
suppressMessages(assign(paste0(i, "_deseq"), deseq.deg(i)))
}
# save(list=paste0(unq,c('_top','_deseq')), file = 'Brain6_DEG_result.Rdata')
# load('Brain6_DEG_result.Rdata')
filt <- function(name, package, cpmthresh = 1, fcthresh = 1, fdrthresh = 0.05) {
if (package == "edgeR") {
top <- get(paste0(name, "_edge"))
top <- top %>% filter(abs(logFC) > fcthresh, abs(logCPM) > cpmthresh, FDR < fdrthresh)
} else {
top <- get(paste0(name, "_deseq"))
top <- top %>% filter(abs(log2FoldChange) > fcthresh, abs(logCPM) > cpmthresh, padj < fdrthresh)
}
top
}
for (i in unq) {
assign(paste0(i, "_edge_filt_cpm3"), filt(i, "edgeR"))
assign(paste0(i, "_edge_filt_cpm2"), filt(i, "edgeR", cpmthresh = 2))
assign(paste0(i, "_edge_filt_cpm1"), filt(i, "edgeR", cpmthresh = 1))
assign(paste0(i, "_deseq_filt_cpm3"), filt(i, "DESeq2"))
assign(paste0(i, "_deseq_filt_cpm2"), filt(i, "DESEq2", cpmthresh = 2))
assign(paste0(i, "_deseq_filt_cpm1"), filt(i, "DESEq2", cpmthresh = 1))
}
From now on, we will focus on genes with
|logCPM| > 1.
volcanoplot <- function(res, lfcthresh = 2, sigthresh = 0.05, main = "Volcano Plot", legendpos = "bottomright", labelsig = TRUE, textcx = 1, package = "DESeq2", ...) {
if (package == "DESeq2") {
with(res, plot(log2FoldChange, -log10(pvalue), pch = 20, main = main, ...))
with(subset(res, padj < sigthresh), points(log2FoldChange, -log10(pvalue), pch = 20, col = "red", ...))
with(subset(res, abs(log2FoldChange) > lfcthresh), points(log2FoldChange, -log10(pvalue), pch = 20, col = "orange", ...))
with(subset(res, padj < sigthresh & abs(log2FoldChange) > lfcthresh), points(log2FoldChange, -log10(pvalue), pch = 20, col = "green", ...))
if (labelsig) {
require(calibrate)
with(subset(res, padj < sigthresh & abs(log2FoldChange) > lfcthresh), textxy(log2FoldChange, -log10(pvalue), labs = Gene, cex = textcx, ...))
}
legend(legendpos, xjust = 1, yjust = 1, legend = c(paste("FDR<", sigthresh, sep = ""), paste("|LogFC|>", lfcthresh, sep = ""), "both"), pch = 20, col = c("red", "orange", "green"))
} else {
with(res, plot(logFC, -log10(PValue), pch = 20, main = main, ...))
with(subset(res, FDR < sigthresh), points(logFC, -log10(PValue), pch = 20, col = "red", ...))
with(subset(res, abs(logFC) > lfcthresh), points(logFC, -log10(PValue), pch = 20, col = "orange", ...))
with(subset(res, FDR < sigthresh & abs(logFC) > lfcthresh), points(logFC, -log10(PValue), pch = 20, col = "green", ...))
if (labelsig) {
require(calibrate)
with(subset(res, FDR < sigthresh & abs(logFC) > lfcthresh), textxy(logFC, -log10(PValue), labs = Gene, cex = textcx, ...))
}
legend(legendpos, xjust = 1, yjust = 1, legend = c(paste("FDR<", sigthresh, sep = ""), paste("|LogFC|>", lfcthresh, sep = ""), "both"), pch = 20, col = c("red", "orange", "green"))
}
}
par(mfrow = c(3, 2))
volcanoplot(Cerebellum_edge, package = "edgeR", main = "Volcano Plot edgeR - Cerebellum", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(Cortex_edge, package = "edgeR", main = "Volcano Plot edgeR - Cortex", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(Hippocampus_edge, package = "edgeR", main = "Volcano Plot edgeR - Hippocampus", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(Hypothalamus_edge, package = "edgeR", main = "Volcano Plot edgeR - Hypothalamus", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(RVLM_edge, package = "edgeR", main = "Volcano Plot edgeR - RVLM", xlim = c(-15, 15), ylim = c(-1, 320))
volcanoplot(Striatum_edge, package = "edgeR", main = "Volcano Plot edgeR - Striatum", xlim = c(-15, 15), ylim = c(-1, 320))
par(mfrow = c(3, 2))
volcanoplot(Cerebellum_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Cerebellum", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(Cortex_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Cortex", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(Hippocampus_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Hippocampus", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(Hypothalamus_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Hypothalamus", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(RVLM_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - RVLM", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot(Striatum_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Striatum", xlim = c(-15, 15), ylim = c(-1, 270))
volcanoplot2 <- function(res, lfcthresh = 1, lcpmthresh = 1, sigthresh = 0.05, main = "Volcano Plot", legendpos = "bottomright", labelsig = TRUE, textcx = 0.8, package = "edgeR", ...) {
if (package == "edgeR") {
with(res, plot(logCPM, -log10(FDR), pch = 20, main = main, ...))
with(subset(res, FDR < sigthresh), points(logCPM, -log10(FDR), pch = 20, col = "red", ...))
with(subset(res, abs(logCPM) > lcpmthresh), points(logCPM, -log10(FDR), pch = 20, col = "orange", ...))
with(subset(res, FDR < sigthresh & abs(logCPM) > lcpmthresh), points(logCPM, -log10(FDR), pch = 20, col = "green", ...))
if (labelsig) {
require(calibrate)
with(subset(res, FDR < sigthresh & abs(logCPM) > lcpmthresh), textxy(logCPM, -log10(FDR), labs = Gene, cex = textcx, ...))
}
legend(legendpos, xjust = 1, yjust = 1, legend = c(paste("FDR<", sigthresh, sep = ""), paste("|LogCPM|>", lcpmthresh, sep = ""), "both"), pch = 20, col = c("red", "orange", "green"))
} else if (package == "DESeq2") {
with(res, plot(logCPM, -log10(padj), pch = 20, main = main, ...))
with(subset(res, padj < sigthresh), points(logCPM, -log10(padj), pch = 20, col = "red", ...))
with(subset(res, abs(logCPM) > lcpmthresh), points(logCPM, -log10(padj), pch = 20, col = "orange", ...))
with(subset(res, padj < sigthresh & abs(logCPM) > lcpmthresh), points(logCPM, -log10(padj), pch = 20, col = "green", ...))
if (labelsig) {
require(calibrate)
with(subset(res, padj < sigthresh & abs(logCPM) > lcpmthresh), textxy(logCPM, -log10(padj), labs = Gene, cex = textcx, ...))
}
legend(legendpos, xjust = 1, yjust = 1, legend = c(paste("FDR<", sigthresh, sep = ""), paste("|LogCPM|>", lcpmthresh, sep = ""), "both"), pch = 20, col = c("red", "orange", "green"))
}
}
# par(mfrow = c(2, 3))
# volcanoplot2(Cerebellum_edge, package = "edgeR", main = "Volcano Plot edgeR - Cerebellum", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Cortex_edge, package = "edgeR", main = "Volcano Plot edgeR - Cortex", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Hippocampus_edge, package = "edgeR", main = "Volcano Plot edgeR - Hippocampus", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Hypothalamus_edge, package = "edgeR", main = "Volcano Plot edgeR - Hypothalamus", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(RVLM_edge, package = "edgeR", main = "Volcano Plot edgeR - RVLM", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Striatum_edge, package = "edgeR", main = "Volcano Plot edgeR - Striatum", xlim = c(-7, 15), ylim = c(-1, 10))
#
# par(mfrow = c(2, 3))
# volcanoplot2(Cerebellum_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Cerebellum", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Cortex_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Cortex", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Hippocampus_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Hippocampus", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Hypothalamus_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Hypothalamus", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(RVLM_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - RVLM", xlim = c(-7, 15), ylim = c(-1, 10))
# volcanoplot2(Striatum_deseq, package = "DESeq2", main = "Volcano Plot DESEq2 - Striatum", xlim = c(-7, 15), ylim = c(-1, 10))
- Cerebellum, Cortex, Hippocampus, Striatum은 EdgeR에서 더 gene detection이 잘 되고, Hypothalamus와 RVLM은 DESeq2에서 더 잘 되는 것을 볼 수 있습니다.
- 전반적으로 EdgeR이 down-regulated 된 gene을 더 잘 detect하고 있습니다.
- DESeq2 결과물은 up-과 down-regulated gene들의 차이가 매우 큽니다.
logCPM = 1logCPM = 1?The barplot above showed the number of genes expressed by logCPM value. Since we want to take a look at the genes as many as possible, we decided to use
logCPM = 1for further analysis.
deg.sum <- function(name, package= "edge", thresh = 1) {
if (package=="edge") {
top <- get(paste0(name,"_", package))
topfilt <- get(paste0(name,"_", package, "_filt_cpm", thresh))
## entire dataset
t.insig <- top[!top$Gene %in% topfilt$Gene,]
### Up-regulated in PM
t.up <- topfilt %>%
dplyr::filter(logCPM > thresh, FDR< 0.05)
t.down <- topfilt %>%
dplyr::filter(logCPM < -thresh, FDR< 0.05)
t <- data.frame(Insig = nrow(t.insig),
Up = nrow(t.up),
Down = nrow(t.down),
Total = nrow(t.insig) + nrow(t.up) + nrow(t.down)
)
} else {
top <- get(paste0(name,"_", package))
topfilt <- get(paste0(name,"_", package, "_filt_cpm", thresh))
## entire dataset
t.insig <- top[!top$Gene %in% topfilt$Gene,]
### Up-regulated in PM
t.up <- topfilt %>%
dplyr::filter(logCPM > thresh, padj < 0.05)
t.down <- topfilt %>%
dplyr::filter(logCPM < -thresh, padj < 0.05)
t <- data.frame(Insig = nrow(t.insig),
Up = nrow(t.up),
Down = nrow(t.down),
Total = nrow(t.insig) + nrow(t.up) + nrow(t.down)
)
}
t
}
res1_edge <- do.call(rbind,lapply(unq, function(i) deg.sum(i, thresh = 1)))
rownames(res1_edge) <- unq
res1_deseq <- do.call(rbind,lapply(unq, function(i) deg.sum(i, package="deseq",thresh = 1)))
rownames(res1_deseq) <- unq
edgeRknitr::kable(res1_edge, caption = "DE Analysis (using EdgeR package)")
| Insig | Up | Down | Total | |
|---|---|---|---|---|
| Cerebellum | 16241 | 32 | 63 | 16336 |
| Cortex | 16164 | 81 | 68 | 16313 |
| Hippocampus | 15799 | 256 | 108 | 16163 |
| Hypothalamus | 17844 | 12 | 16 | 17872 |
| RVLM | 18310 | 7 | 148 | 18465 |
| Striatum | 15835 | 783 | 296 | 16914 |
DESeq2knitr::kable(res1_deseq, caption = "DE Analysis (using DESeq2 package)")
| Insig | Up | Down | Total | |
|---|---|---|---|---|
| Cerebellum | 16326 | 10 | 0 | 16336 |
| Cortex | 16272 | 41 | 0 | 16313 |
| Hippocampus | 16151 | 12 | 0 | 16163 |
| Hypothalamus | 15252 | 2422 | 198 | 17872 |
| RVLM | 18082 | 331 | 52 | 18465 |
| Striatum | 16769 | 145 | 0 | 16914 |
updownplot <- function(name, thresh = 1, package="edge") {
if (package=="edge") {
x <- get(paste0(name,"_edge_filt_cpm", thresh))
t <- x[order(x$PValue),]
if (any(t$logCPM < 0)) {
barplot(t$logCPM, names.arg= t$Gene, horiz=T, las=1, col= ifelse(t$logCPM > 0, "darkred", "darkblue"), main = paste0(name, " logCPM (lowest to highest p-value)"));abline(v=0)
} else {
barplot(t$logCPM, names.arg= t$Gene, horiz=T, las=1, col="darkred", main = paste0(name, " logCPM (lowest to highest p-value)"))
}
} else {
x <- get(paste0(name,"_deseq_filt_cpm", thresh))
t <- x[order(x$pvalue),]
if (any(t$logCPM < 0)) {
barplot(t$logCPM, names.arg= t$Gene, horiz=T, las=1, col= ifelse(t$logCPM > 0, "darkred", "darkblue"), main = paste0(name, " logCPM (lowest to highest p-value)"));abline(v=0)
} else {
barplot(t$logCPM, names.arg= t$Gene, horiz=T, las=1, col="darkred", main = paste0(name, " logCPM (lowest to highest p-value)"))
}
}
}
edgeRpar(mfrow=c(2,3), mar=c(3,5,3,3))
updownplot("Cerebellum")
updownplot("Cortex")
updownplot("Hippocampus")
updownplot("Hypothalamus")
updownplot("RVLM")
updownplot("Striatum")
DESeq2par(mfrow=c(2,3), mar=c(3,5,3,3))
updownplot("Cerebellum", package="deseq")
updownplot("Cortex", package="deseq")
updownplot("Hippocampus", package="deseq")
updownplot("Hypothalamus", package="deseq")
updownplot("RVLM", package="deseq")
updownplot("Striatum", package="deseq")
compare <- function(name) {
a <- get(paste0(name,"_edge_filt_cpm1"))
b <- get(paste0(name,"_deseq_filt_cpm1"))
a <- a %>%
mutate(updown = ifelse(logCPM > 0, "up","down"), package="edgeR") %>%
arrange(desc(updown)) %>%
dplyr::select(Gene, updown)
b <- b %>%
mutate(updown = ifelse(logCPM > 0, "up","down"), package="DESeq2") %>%
arrange(desc(updown)) %>%
dplyr::select(Gene, updown)
both <- intersect(a$Gene, b$Gene)
edge_only <- setdiff(a$Gene, b$Gene)
deseq_only <- setdiff(b$Gene, a$Gene)
both.df <- a[a$Gene %in% both,]
both.df <- both.df %>% mutate(packageOnly="Both")
edge_only.df <- a[a$Gene %in% edge_only,]
edge_only.df <- edge_only.df %>% mutate(packageOnly="edgeR")
deseq_only.df <- b[b$Gene %in% deseq_only,]
deseq_only.df <- deseq_only.df %>% mutate(packageOnly="DESeq2")
tab <- rbind(both.df, edge_only.df, deseq_only.df)
# knitr::kable(tab)
# a
return(list(tab, name))
}
t <- lapply(unq, compare)
par(mfrow=c(2,3))
invisible(lapply(t, function(x) {
edgeR <- ifelse(x[[1]]$packageOnly %in% c("Both", "edgeR"),1,0)
DESeq2 <- ifelse(x[[1]]$packageOnly %in% c("Both", "DESeq2"),1,0)
c <- cbind(edgeR, DESeq2)
vennDiagram(c);mtext(x[[2]])
}))
# write.csv(t[[1]], file = "Cerebellum_genelist.csv")
# write.csv(t[[2]], file = "Cortex_genelist.csv")
# write.csv(t[[3]], file = "Hippocampus_genelist.csv")
# write.csv(t[[4]], file = "Hypothalamus_genelist.csv")
# write.csv(t[[5]], file = "RVLM_genelist.csv")
# write.csv(t[[6]], file = "Striatum_genelist.csv")
Criteria:
- p-value cutoff: 0.15
- by package (2 options): `edgeR`, `DESeq2`
- by direction (2 options): Up, Down
- by GO Hierarchy 3 Sub-Ontologies: `BP (Biological Process)`, `MF (Molecular Function)`, `CC (Cellular Component)`
Most of the genes had p-values less than 0.1 (or slightly above). Hence, we decided to use the p-value cutoff = 0.15.
goterm <- function(name, package="edge", thresh=1, direction = "up", pcut = 0.1, ontm="BP") {
# universal
t <- get(paste0(name,"_",package))
universe_gene.df <- bitr(rownames(t), fromType = "SYMBOL",
toType = c("ENSEMBL","ENTREZID"),
OrgDb = org.Mm.eg.db)
# upregulated, downregulated genes
t_filt <- get(paste0(name, "_",package,"_filt_cpm",thresh))
if (direction == "up") {
up_gene.df <- t_filt %>%
filter(logCPM > 0) %>%
dplyr::select(Gene, logCPM)
up_bitr <- bitr(up_gene.df$Gene, fromType = "SYMBOL",
toType = c("ENSEMBL","ENTREZID"),
OrgDb = org.Mm.eg.db)
# ego
ego <- enrichGO(gene = up_bitr[,3],
universe = universe_gene.df[,3],
OrgDb = org.Mm.eg.db,
ont = ontm,
pAdjustMethod = "BH",
pvalueCutoff = pcut,
qvalueCutoff = 0.1,
readable = TRUE)
} else {
down_gene.df <- t_filt %>% filter(logCPM < 0) %>% dplyr::select(Gene, logCPM)
down_bitr <- bitr(down_gene.df$Gene, fromType = "SYMBOL",
toType = c("ENSEMBL","ENTREZID"),
OrgDb = org.Mm.eg.db)
# ego
ego <- enrichGO(gene = down_bitr[,3],
universe = universe_gene.df[,3],
OrgDb = org.Mm.eg.db,
ont = ontm,
pAdjustMethod = "BH",
pvalueCutoff = pcut,
qvalueCutoff = 0.1,
readable = TRUE)
}
ego
}
edgeRego_list_up_edge.15 <- lapply(unq, function(x) goterm(x, direction = "up", pcut = 0.15))
dotplot(ego_list_up_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Up, p<0.15, BP)")
dotplot(ego_list_up_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Up, p<0.15, BP)")
dotplot(ego_list_up_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Up, p<0.15, BP)")
dotplot(ego_list_up_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Up, p<0.15, BP)")
dotplot(ego_list_up_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Up, p<0.15, BP)")
dotplot(ego_list_up_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Up, p<0.15, BP)")
ego_list_down_edge.15 <- lapply(unq, function(x) goterm(x, direction = "down", pcut = 0.15))
dotplot(ego_list_down_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Down, p<0.15, BP)")
dotplot(ego_list_down_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Down, p<0.15, BP)")
dotplot(ego_list_down_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Down, p<0.15, BP)")
dotplot(ego_list_down_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Down, p<0.15, BP)")
dotplot(ego_list_down_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Down, p<0.15, BP)")
dotplot(ego_list_down_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Down, p<0.15, BP)")
ego_list_up_edge.15 <- lapply(unq, function(x) goterm(x, direction = "up", pcut = 0.15, ontm="MF"))
dotplot(ego_list_up_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Up, p<0.15, MF)")
dotplot(ego_list_up_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Up, p<0.15, MF)")
dotplot(ego_list_up_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Up, p<0.15, MF)")
dotplot(ego_list_up_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Up, p<0.15, MF)")
dotplot(ego_list_up_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Up, p<0.15, MF)")
dotplot(ego_list_up_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Up, p<0.15, MF)")
ego_list_down_edge.15 <- lapply(unq, function(x) goterm(x, direction = "down", pcut = 0.15, ontm="MF"))
dotplot(ego_list_down_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Down, p<0.15, MF)")
dotplot(ego_list_down_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Down, p<0.15, MF)")
dotplot(ego_list_down_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Down, p<0.15, MF)")
dotplot(ego_list_down_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Down, p<0.15, MF)")
dotplot(ego_list_down_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Down, p<0.15, MF)")
dotplot(ego_list_down_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Down, p<0.15, MF)")
ego_list_up_edge.15 <- lapply(unq, function(x) goterm(x, direction = "up", pcut = 0.15, ontm="CC"))
dotplot(ego_list_up_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Up, p<0.15, CC)")
dotplot(ego_list_up_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Up, p<0.15, CC)")
dotplot(ego_list_up_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Up, p<0.15, CC)")
dotplot(ego_list_up_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Up, p<0.15, CC)")
dotplot(ego_list_up_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Up, p<0.15, CC)")
dotplot(ego_list_up_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Up, p<0.15, CC)")
ego_list_down_edge.15 <- lapply(unq, function(x) goterm(x, direction = "down", pcut = 0.15, ontm="CC"))
dotplot(ego_list_down_edge.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(edgeR, Down, p<0.15, CC)")
dotplot(ego_list_down_edge.15[[2]], orderBy = "p.adjust", title="Cortex\n(edgeR, Down, p<0.15, CC)")
dotplot(ego_list_down_edge.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(edgeR, Down, p<0.15, CC)")
dotplot(ego_list_down_edge.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(edgeR, Down, p<0.15, CC)")
dotplot(ego_list_down_edge.15[[5]], orderBy = "p.adjust", title="RVLM\n(edgeR, Down, p<0.15, CC)")
dotplot(ego_list_down_edge.15[[6]], orderBy = "p.adjust", title="Striatum\n(edgeR, Down, p<0.15, CC)")
DESeq2ego_list_up_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "up", pcut = 0.15))
dotplot(ego_list_up_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Up, p<0.15, BP)")
dotplot(ego_list_up_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Up, p<0.15, BP)")
dotplot(ego_list_up_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Up, p<0.15, BP)")
dotplot(ego_list_up_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Up, p<0.15, BP)")
dotplot(ego_list_up_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Up, p<0.15, BP)")
dotplot(ego_list_up_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Up, p<0.15, BP)")
ego_list_down_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "down", pcut = 0.15))
# dotplot(ego_list_down_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Down, p<0.15, BP)")
# dotplot(ego_list_down_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Down, p<0.15, BP)")
# dotplot(ego_list_down_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Down, p<0.15, BP)")
dotplot(ego_list_down_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Down, p<0.15, BP)")
dotplot(ego_list_down_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Down, p<0.15, BP)")
# dotplot(ego_list_down_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Down, p<0.15, BP)")
ego_list_up_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "up", pcut = 0.15, ontm="MF"))
dotplot(ego_list_up_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Up, p<0.15, MF)")
dotplot(ego_list_up_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Up, p<0.15, MF)")
dotplot(ego_list_up_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Up, p<0.15, MF)")
dotplot(ego_list_up_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Up, p<0.15, MF)")
dotplot(ego_list_up_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Up, p<0.15, MF)")
dotplot(ego_list_up_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Up, p<0.15, MF)")
ego_list_down_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "down", pcut = 0.15, ontm="MF"))
# dotplot(ego_list_down_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Down, p<0.15, MF)")
# dotplot(ego_list_down_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Down, p<0.15, MF)")
# dotplot(ego_list_down_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Down, p<0.15, MF)")
dotplot(ego_list_down_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Down, p<0.15, MF)")
dotplot(ego_list_down_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Down, p<0.15, MF)")
# dotplot(ego_list_down_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Down, p<0.15, MF)")
ego_list_up_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "up", pcut = 0.15, ontm="CC"))
dotplot(ego_list_up_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Up, p<0.15, CC)")
dotplot(ego_list_up_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Up, p<0.15, CC)")
dotplot(ego_list_up_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Up, p<0.15, CC)")
dotplot(ego_list_up_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Up, p<0.15, CC)")
dotplot(ego_list_up_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Up, p<0.15, CC)")
dotplot(ego_list_up_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Up, p<0.15, CC)")
ego_list_down_deseq.15 <- lapply(unq, function(x) goterm(x, package="deseq", direction = "down", pcut = 0.15, ontm="CC"))
# dotplot(ego_list_down_deseq.15[[1]], orderBy = "p.adjust", title="Cerebellum\n(DESeq2, Down, p<0.15, CC)")
# dotplot(ego_list_down_deseq.15[[2]], orderBy = "p.adjust", title="Cortex\n(DESeq2, Down, p<0.15, CC)")
# dotplot(ego_list_down_deseq.15[[3]], orderBy = "p.adjust", title="Hippocampus\n(DESeq2, Down, p<0.15, CC)")
dotplot(ego_list_down_deseq.15[[4]], orderBy = "p.adjust", title="Hypothalamus\n(DESeq2, Down, p<0.15, CC)")
dotplot(ego_list_down_deseq.15[[5]], orderBy = "p.adjust", title="RVLM\n(DESeq2, Down, p<0.15, CC)")
# dotplot(ego_list_down_deseq.15[[6]], orderBy = "p.adjust", title="Striatum\n(DESeq2, Down, p<0.15, CC)")